#!/bin/bash
# This is a script for evaluating the model for code generation.
##################################################################################################
### **Default parameters**
set -euo pipefail
MODES=(
  #"code_refinement_with_instructions_fc_cs" 
  #"code_refinement_with_instructions_fc_ct" 
  
  "code_generation_cs" 
  "code_generation_ct"
)

CODE_GEN_MODE="base"
# Components: "base", "our"

OUR_MODE="SFT"
# Components: "SFT", "RL"

USE_NONE_LIST="False" # What to use for task_id?
# Components: "True" (use out_task_id), "False" (use in_task_id)

DATASETS=(
  "humaneval"
  "mbpp"
)
# Components: "humaneval", "mbpp"

CONTRACT_TEST_CASE_TYPE="grammar"
# Components: "grammar", "direct"

CODE_GENERATION_MODEL_NAMES=( ### Add code generation model names here
  "DeepSeek-R1-Distill-Qwen-14B"
  #"Mistral-Nemo-Base-2407"
  #"Qwen3-14B"
  #"Phi-4-reasoning-plus"
  #"o4-mini"
)
# Components: "DeepSeek", "Mistral", "Qwen", "Phi"

# Map code generation model name -> contract test model directory basename
declare -A CONTRACT_TEST_MODEL_MAP=(
  ["DeepSeek-R1-Distill-Qwen-14B"]="DeepSeek-RL"
  ["Mistral-Nemo-Base-2407"]="Mistral-RL"
  ["o4-mini"]="o4-mini"
)

##################################################################################################
for DATASET in "${DATASETS[@]}"; do

  if [ "$CODE_GEN_MODE" == "base" ]; then
      CODE_GENERATION_MODEL_PATH_TAG="../../code/output_base/${DATASET}/inference"
  else
      CODE_GENERATION_MODEL_PATH_TAG="../../code/output_our/${DATASET}/inference"
  fi

  if [ "$DATASET" == "humaneval" ]; then
      FUNCTIONALITY_DATASET_PATH="../../data/evalplus-0.1.1/HumanEvalPlus.jsonl"
  elif [ "$DATASET" == "mbpp" ]; then
      FUNCTIONALITY_DATASET_PATH="../../data/mbppplus-0.2.0/MbppPlus.jsonl"
  else
      echo "Invalid DATASET"
      exit 1
  fi

  OUTPUT_PATH="../../code/evaluation_code_generation/${DATASET}/"


  echo "------------------------------------------------------------"
  for MODE in "${MODES[@]}"; do
    for MODEL_NAME in "${CODE_GENERATION_MODEL_NAMES[@]}"; do
        
      if [ "$CODE_GEN_MODE" == "base" ]; then
        CODE_GENERATION_MODEL_PATH="${CODE_GENERATION_MODEL_PATH_TAG}/${MODE}/${MODEL_NAME}/generated_step_all.json"
      else
        CODE_GENERATION_MODEL_PATH="${CODE_GENERATION_MODEL_PATH_TAG}/${MODE}/${MODEL_NAME}/${DATASET}_code_gen_ct_${OUR_MODE}@1.jsonl"
      fi
      

      if [ "$CONTRACT_TEST_CASE_TYPE" == "grammar" ]; then
        # grammar test case
        CONTRACTS_DATASET_PATH="../../data/code_generation/${DATASET}/CODE_GENERATION_CT/o4-mini/contract_check_results_total_assertion_only.json"
      else
        # direct test case
        CONTRACTS_DATASET_PATH="../../code/evaluation_test_case_pass_k/${DATASET}/pre_filtering/multi_assert_specification/o4-mini/o4-mini_all_results.json"
      fi



      OUTPUT_PATH="../../code/evaluation_code_generation/${DATASET}/"

      if [ "$CODE_GEN_MODE" == "our" ]; then
        OUT_DIR="${OUTPUT_PATH}${MODE}-${CONTRACT_TEST_CASE_TYPE}/${MODEL_NAME}-${OUR_MODE}/"
      else
        OUT_DIR="${OUTPUT_PATH}${MODE}-${CONTRACT_TEST_CASE_TYPE}/${MODEL_NAME}/"
      fi
      
      mkdir -p "$OUT_DIR"
      echo "→ MODE=$MODE  DATASET=$DATASET "
      echo "→ MODEL_NAME: $MODEL_NAME"
      echo "=== Evaluating: $CODE_GENERATION_MODEL_PATH ==="
      echo "    → output_dir: $OUT_DIR"
      echo "    → modeldir : $CODE_GENERATION_MODEL_PATH"

      python ../../code/utils/evaluation_code_generation.py \
          --code_generation_model_path "$CODE_GENERATION_MODEL_PATH" \
          --code_generation_model_name "$MODEL_NAME" \
          --functionality_dataset_path "$FUNCTIONALITY_DATASET_PATH" \
          --contracts_dataset_path "$CONTRACTS_DATASET_PATH" \
          --output_path "$OUT_DIR" \
          --mode "$MODE" \
          --use_None_list "$USE_NONE_LIST" \
          --contract_test_case_type "$CONTRACT_TEST_CASE_TYPE"
      echo "\n✅ All evaluations finished."
    done
  done
done